Get Buoy and Vessel data¶

In [1]:
# parameters

lit_data_url = "https://osis.geomar.de/underway/3ka2chb/api/v1/positions/Littorina2023?include_payloads=false&earliest_timestamp=2023-04-24"
lit_data_file = "lit_positions.csv"

buoy_zip_url = "https://cloud.geomar.de/s/tNRg9raGEK357e4/download"
buoy_zip_file = "buoy_positions.zip"
buoy_data_path = "data/"
buoy_data_path_full = "data/2023-05-03_Drifter_Filedrop/"
buoy_data_file = "buoy_positions.csv"

buoy_whitelist = ["D298", "D299", "D300", "D301", "D302", "D303", ]
In [2]:
!curl -o {buoy_zip_file} {buoy_zip_url}
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 2526k    0 2526k    0     0  13332      0 --:--:--  0:03:14 --:--:--   683
In [3]:
!mkdir -p {buoy_data_path}
In [4]:
!unzip -o -q {buoy_zip_file} -d {buoy_data_path}

Buoy positions¶

In [5]:
import pandas as pd
import hvplot.pandas

from pathlib import Path
In [6]:
def _try_reading_csv(file):
    try:
        return pd.read_csv(file)
    except pd.errors.EmptyDataError:
        return None


def load_all_buoy_csv_files(path=None):
    files = sorted(Path(path).glob("*.csv"))
    _dfs = [_try_reading_csv(f) for f in files]
    _dfs = [_df for _df in _dfs if _df is not None]
    df = pd.concat(_dfs, ignore_index=True)
    df["date_UTC"] = pd.to_datetime(df["date_UTC"])
    df["D_number"] = df["D_number"].astype(int).astype(str)
    df = df.set_index(["D_number", "date_UTC"])
    df = df.sort_index()
    df = df.drop_duplicates()
    df = df.reset_index()
    return df
In [7]:
%%time

df_buoys = load_all_buoy_csv_files(
    path=buoy_data_path_full,
)

df_buoys
CPU times: user 6.63 s, sys: 89.5 ms, total: 6.72 s
Wall time: 6.72 s
Out[7]:
D_number date_UTC Latitude Longitude U_speed_mps U_Dir_deg batteryState
0 290 2023-05-05 16:26:21 54.48506 11.13692 0.060656 -36.271504 GOOD
1 298 2023-04-20 10:04:49 54.32977 10.14909 NaN NaN GOOD
2 298 2023-04-20 10:09:52 54.32967 10.14905 0.037714 -167.775106 GOOD
3 298 2023-04-20 10:14:50 54.32969 10.14911 0.015074 59.369044 GOOD
4 298 2023-04-20 10:19:47 54.32991 10.14860 0.138800 -54.503193 GOOD
... ... ... ... ... ... ... ...
12224 303 2023-05-09 08:40:47 54.93622 10.73319 0.085953 23.571389 GOOD
12225 303 2023-05-09 08:45:46 54.93580 10.73306 0.158797 -171.313893 GOOD
12226 303 2023-05-09 09:20:47 54.93616 10.73313 0.019189 4.969256 GOOD
12227 303 2023-05-09 09:25:48 54.93605 10.73306 0.043317 -161.295998 GOOD
12228 303 2023-05-09 09:40:44 54.93613 10.73317 0.012674 36.949253 GOOD

12229 rows × 7 columns

In [8]:
df_buoys = df_buoys.where(df_buoys.D_number.apply(lambda num: f"D{num}" in buoy_whitelist)).dropna()
In [9]:
df_buoys.to_csv(buoy_data_file, index=False)
!head -n5 {buoy_data_file}
D_number,date_UTC,Latitude,Longitude,U_speed_mps,U_Dir_deg,batteryState
298,2023-04-20 10:09:52,54.32967,10.14905,0.0377143858748649,-167.775106305755,GOOD
298,2023-04-20 10:14:50,54.32969,10.14911,0.0150742341143873,59.3690436747781,GOOD
298,2023-04-20 10:19:47,54.32991,10.1486,0.138800023585503,-54.5031934910538,GOOD
298,2023-04-20 10:24:52,54.32973,10.14894,0.0978186034069362,131.237675749866,GOOD
In [10]:
df_buoys.date_UTC.max()
Out[10]:
Timestamp('2023-05-09 09:40:44')

Vessel positions¶

In [11]:
import geopandas
/tmp/ipykernel_339/1529612126.py:1: UserWarning: Shapely 2.0 is installed, but because PyGEOS is also installed, GeoPandas will still use PyGEOS by default for now. To force to use and test Shapely 2.0, you have to set the environment variable USE_PYGEOS=0. You can do this before starting the Python process, or in your code before importing geopandas:

import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas
In [12]:
def read_littorina_positions(url=None):
    df = geopandas.read_file(
        url
    )
    
    df["Longitude"] = df.geometry.apply(lambda p: p.x)
    df["Latitude"] = df.geometry.apply(lambda p: p.y)
    df = df.drop(columns=["geometry", ])
    
    # readacted positions
    redact_here = df.datastream.str.contains("MarineTraffic")
    df["Longitude_"] = (
        redact_here * df["Longitude"].round(1)
        + ~redact_here * df["Longitude"]
    )
    df["Latitude_"] = (
        redact_here * df["Latitude"].round(1)
        + ~redact_here * df["Latitude"]
    )
    
    return df
In [13]:
df_lit = read_littorina_positions(url=lit_data_url)
In [14]:
df_lit.to_csv(lit_data_file, index=False)
!head -n5 {lit_data_file}
id,context_shortname,datastream,import_info_id,import_time,obs_timestamp,platform_shortname,Longitude,Latitude,Longitude_,Latitude_
positions_for_context.12046da4-ab40-4f33-a47b-ea5583281213,Littorina2023,MarineTrafficConnector_gitlab,85d398b9-51ab-4a7a-841a-7fc3f4024060,2023-04-24 01:01:24.966000+00:00,2023-04-24 00:55:07+00:00,Littorina,10.18159,54.32806,10.2,54.3
positions_for_context.382a59b3-1cc1-4bdf-a959-3c765caf50e8,Littorina2023,MarineTrafficConnector_gitlab,2a6143de-7b8a-4197-8b34-90214a3a9fa1,2023-04-24 02:01:27.990000+00:00,2023-04-24 01:55:10+00:00,Littorina,10.18163,54.32806,10.2,54.3
positions_for_context.26e7b71c-9d4f-47c8-9337-b19673ae58d0,Littorina2023,MarineTrafficConnector_gitlab,17be6065-c029-46d3-b29c-c3d7284cbf5a,2023-04-24 03:01:10.069000+00:00,2023-04-24 02:55:14+00:00,Littorina,10.1816,54.32805,10.2,54.3
positions_for_context.ce96acdb-ca5f-46aa-b7b8-069c6bd19385,Littorina2023,MarineTrafficConnector_gitlab,8f51352e-debe-41db-9aae-95b1bad0d9cb,2023-04-24 04:01:13.361000+00:00,2023-04-24 03:58:09+00:00,Littorina,10.1816,54.32809,10.2,54.3